attendance <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-04/attendance.csv")
## Parsed with column specification:
## cols(
## team = col_character(),
## team_name = col_character(),
## year = col_double(),
## total = col_double(),
## home = col_double(),
## away = col_double(),
## week = col_double(),
## weekly_attendance = col_double()
## )
standings <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-04/standings.csv")
## Parsed with column specification:
## cols(
## team = col_character(),
## team_name = col_character(),
## year = col_double(),
## wins = col_double(),
## loss = col_double(),
## points_for = col_double(),
## points_against = col_double(),
## points_differential = col_double(),
## margin_of_victory = col_double(),
## strength_of_schedule = col_double(),
## simple_rating = col_double(),
## offensive_ranking = col_double(),
## defensive_ranking = col_double(),
## playoffs = col_character(),
## sb_winner = col_character()
## )
games <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-04/games.csv")
## Parsed with column specification:
## cols(
## year = col_double(),
## week = col_character(),
## home_team = col_character(),
## away_team = col_character(),
## winner = col_character(),
## tie = col_character(),
## day = col_character(),
## date = col_character(),
## time = col_time(format = ""),
## pts_win = col_double(),
## pts_loss = col_double(),
## yds_win = col_double(),
## turnovers_win = col_double(),
## yds_loss = col_double(),
## turnovers_loss = col_double(),
## home_team_name = col_character(),
## home_team_city = col_character(),
## away_team_name = col_character(),
## away_team_city = col_character()
## )
# Install pkg by devtools
devtools::install_github("thebioengineer/tidytuesdayR")
## Load the data
tuesdata <- tidytuesdayR::tt_load("2020-02-04")
## OR
tuesdata <- tidytuesdayR::tt_load(2020, week = 6)
## Get the individual tables
attendance <- tuesdata$attendance
Check out path and change if necessary
getwd()
## [1] "/home/danicassol/TidyTuesday/R_code"
# setwd('./TidyTuesday/')
dir()
## [1] "g1_plot.gif" "TidyTuesday_cache"
## [3] "TidyTuesday.Rmd"
Load Data:
attendance <- read.csv(file = "../data/2020/2020-02-04/attendance.csv")
standings <- read.csv(file = "../data/2020/2020-02-04/standings.csv")
games <- read.csv(file = "../data/2020/2020-02-04/games.csv")
attendance
## # A tibble: 10,846 x 8
## team team_name year total home away week
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ariz… Cardinals 2000 893926 387475 506451 1
## 2 Ariz… Cardinals 2000 893926 387475 506451 2
## 3 Ariz… Cardinals 2000 893926 387475 506451 3
## 4 Ariz… Cardinals 2000 893926 387475 506451 4
## 5 Ariz… Cardinals 2000 893926 387475 506451 5
## 6 Ariz… Cardinals 2000 893926 387475 506451 6
## 7 Ariz… Cardinals 2000 893926 387475 506451 7
## 8 Ariz… Cardinals 2000 893926 387475 506451 8
## 9 Ariz… Cardinals 2000 893926 387475 506451 9
## 10 Ariz… Cardinals 2000 893926 387475 506451 10
## # … with 10,836 more rows, and 1 more variable:
## # weekly_attendance <dbl>
dim(attendance)
## [1] 10846 8
colnames(attendance)
## [1] "team" "team_name"
## [3] "year" "total"
## [5] "home" "away"
## [7] "week" "weekly_attendance"
unique(attendance$team)
## [1] "Arizona" "Atlanta" "Baltimore"
## [4] "Buffalo" "Carolina" "Chicago"
## [7] "Cincinnati" "Cleveland" "Dallas"
## [10] "Denver" "Detroit" "Green Bay"
## [13] "Indianapolis" "Jacksonville" "Kansas City"
## [16] "Miami" "Minnesota" "New England"
## [19] "New Orleans" "New York" "Oakland"
## [22] "Philadelphia" "Pittsburgh" "San Diego"
## [25] "San Francisco" "Seattle" "St. Louis"
## [28] "Tampa Bay" "Tennessee" "Washington"
## [31] "Houston" "Los Angeles"
summary(attendance$team)
## Length Class Mode
## 10846 character character
unique(attendance$year)
## [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
## [12] 2011 2012 2013 2014 2015 2016 2017 2018 2019
summary(attendance$year)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2000 2005 2010 2010 2015 2019
any(is.na(attendance$weekly_attendance))
## [1] TRUE
str(attendance)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10846 obs. of 8 variables:
## $ team : chr "Arizona" "Arizona" "Arizona" "Arizona" ...
## $ team_name : chr "Cardinals" "Cardinals" "Cardinals" "Cardinals" ...
## $ year : num 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## $ total : num 893926 893926 893926 893926 893926 ...
## $ home : num 387475 387475 387475 387475 387475 ...
## $ away : num 506451 506451 506451 506451 506451 ...
## $ week : num 1 2 3 4 5 6 7 8 9 10 ...
## $ weekly_attendance: num 77434 66009 NA 71801 66985 ...
## - attr(*, "spec")=
## .. cols(
## .. team = col_character(),
## .. team_name = col_character(),
## .. year = col_double(),
## .. total = col_double(),
## .. home = col_double(),
## .. away = col_double(),
## .. week = col_double(),
## .. weekly_attendance = col_double()
## .. )
combine <- attendance %>% left_join(standings, by = c("team",
"team_name", "year"))
combine <- combine %>% mutate(full_name = paste(team, team_name,
sep = "_"))
col_remove <- names(combine) %in% c("full_name")
combine[, !col_remove]
## # A tibble: 10,846 x 20
## team team_name year total home away week
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ariz… Cardinals 2000 893926 387475 506451 1
## 2 Ariz… Cardinals 2000 893926 387475 506451 2
## 3 Ariz… Cardinals 2000 893926 387475 506451 3
## 4 Ariz… Cardinals 2000 893926 387475 506451 4
## 5 Ariz… Cardinals 2000 893926 387475 506451 5
## 6 Ariz… Cardinals 2000 893926 387475 506451 6
## 7 Ariz… Cardinals 2000 893926 387475 506451 7
## 8 Ariz… Cardinals 2000 893926 387475 506451 8
## 9 Ariz… Cardinals 2000 893926 387475 506451 9
## 10 Ariz… Cardinals 2000 893926 387475 506451 10
## # … with 10,836 more rows, and 13 more variables:
## # weekly_attendance <dbl>, wins <dbl>, loss <dbl>,
## # points_for <dbl>, points_against <dbl>,
## # points_differential <dbl>, margin_of_victory <dbl>,
## # strength_of_schedule <dbl>, simple_rating <dbl>,
## # offensive_ranking <dbl>, defensive_ranking <dbl>,
## # playoffs <chr>, sb_winner <chr>
row_remove <- combine$team %in% "Arizona"
combine[!row_remove, ]
## # A tibble: 10,506 x 21
## team team_name year total home away week
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Atla… Falcons 2000 964579 422814 541765 1
## 2 Atla… Falcons 2000 964579 422814 541765 2
## 3 Atla… Falcons 2000 964579 422814 541765 3
## 4 Atla… Falcons 2000 964579 422814 541765 4
## 5 Atla… Falcons 2000 964579 422814 541765 5
## 6 Atla… Falcons 2000 964579 422814 541765 6
## 7 Atla… Falcons 2000 964579 422814 541765 7
## 8 Atla… Falcons 2000 964579 422814 541765 8
## 9 Atla… Falcons 2000 964579 422814 541765 9
## 10 Atla… Falcons 2000 964579 422814 541765 10
## # … with 10,496 more rows, and 14 more variables:
## # weekly_attendance <dbl>, wins <dbl>, loss <dbl>,
## # points_for <dbl>, points_against <dbl>,
## # points_differential <dbl>, margin_of_victory <dbl>,
## # strength_of_schedule <dbl>, simple_rating <dbl>,
## # offensive_ranking <dbl>, defensive_ranking <dbl>,
## # playoffs <chr>, sb_winner <chr>, full_name <chr>
combine <- combine %>% group_by(full_name, year) %>% mutate(avg_home_att = round(mean(home/8),
0)) %>% ungroup()
att_home <- combine %>% select(full_name, year, avg_home_att,
playoffs) %>% distinct()
att_home_summary <- att_home %>% group_by(full_name, playoffs) %>%
mutate(avg_home_att_poff = round(mean(avg_home_att), 0)) %>%
ungroup() %>% select(full_name, playoffs, avg_home_att_poff) %>%
distinct()
library(RColorBrewer)
n <- length(unique(games$home_team))
qual_col_pals <- brewer.pal.info[brewer.pal.info$category ==
"qual", ]
nflcol <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors,
rownames(qual_col_pals)))
# pie(rep(1,n), col=sample(nflcol, n))
plotdat <- games %>% filter(year == 2019) %>% group_by(home_team_name) %>%
summarise(pts_win = mean(pts_win, na.rm = TRUE), pts_loss = mean(pts_loss,
na.rm = TRUE)) %>% arrange(pts_win) %>% mutate(home_team_name = factor(home_team_name,
levels = .$home_team_name))
ggplot(plotdat, aes(x = pts_win, y = pts_loss, col = home_team_name,
label = home_team_name)) + geom_text(size = 3) + scale_color_manual(values = nflcol) +
guides(col = FALSE) + theme_light() + labs(x = "Mean pts_win",
y = "Mean pts_loss", title = "2019 NFL pts_win and pts_loss Comparison (Home team)")
Code based on link
top <- attendance %>% filter(!is.na(weekly_attendance)) %>% group_by(team_name) %>%
summarise(n = sum(weekly_attendance)) %>% top_n(4)
## Selecting by n
df <- attendance %>% # filter(!is.na(weekly_attendance)) %>%
filter(team_name %in% top$team_name)
g <- ggplot(df, aes(x = year, y = as.factor(week))) + scale_x_continuous(position = "top") +
scale_fill_paletteer_c("grDevices::Greens", direction = -1) +
geom_tile(data = subset(df, !is.na(weekly_attendance)), aes(fill = weekly_attendance),
color = "grey12") + geom_tile(data = subset(df, is.na(weekly_attendance)),
fill = "grey20", color = "grey12") + facet_wrap(~team_name,
nrow = 2, strip.position = "bottom", scales = "free") + labs(title = "Weekly Attendance",
subtitle = "Top 4", x = "Year", y = "Week", fill = "Rate",
caption = "Data: 'NFL Attendance'") + theme(panel.grid = element_blank(),
axis.ticks.y = element_line(color = "grey76"), legend.position = "none",
legend.background = element_rect(fill = "grey10"), legend.key.size = unit(1.5,
"cm"), panel.background = element_rect(fill = "grey10",
color = "grey10"), plot.background = element_rect(fill = "grey10"),
strip.background = element_rect(fil = "grey20"), panel.spacing = unit(2,
"lines"), plot.title = element_text(size = 28, color = "grey76",
hjust = 0.5), plot.subtitle = element_text(size = 20,
color = "grey76", hjust = 0.5), plot.caption = element_text(size = 14,
color = "grey76", hjust = 0.99), axis.text = element_text(family = "Roboto Mono",
size = 14, colour = "grey76"), strip.text.x = element_text(family = "Roboto Mono",
size = 14, colour = "grey76"), axis.title = element_text(family = "Roboto Mono",
size = 20, colour = "white"), legend.text = element_text(family = "Roboto Mono",
size = 10, colour = "grey76"), legend.title = element_text(family = "Roboto Mono",
size = 14, colour = "grey76"))
g
g1 <- g + transition_time(year) + shadow_mark() + enter_recolor()
animate(g1, renderer = gifski_renderer(), height = 800, width = 1000,
fps = 10)
anim_save("g1_plot.gif")
Code based on link
plot_data <- combine %>% filter(grepl(pattern = "Chargers|Rams",
x = team_name)) %>% filter(year > 2010)
avg_att <- plot_data %>% group_by(team, team_name, year) %>%
summarise(avg_weekly_atd = round(x = mean(x = weekly_attendance,
na.rm = TRUE), digits = 0)) %>% ungroup()
att_th <- 90000
top_att <- plot_data %>% filter(weekly_attendance > att_th) %>%
select(weekly_attendance)
top_att_games <- data.frame(team = character(), team_name = character(),
year = double(), week = double(), weekly_attendance = double())
for (i in seq(from = 1, to = nrow(top_att), by = 1)) {
top_att_games <- rbind(top_att_games, data.frame(combine %>%
filter(weekly_attendance == top_att$weekly_attendance[i]) %>%
select(team, team_name, year, week, weekly_attendance) %>%
arrange(desc(weekly_attendance)) %>% mutate(opp_team = case_when(weekly_attendance ==
lag(x = weekly_attendance, n = 1) ~ lag(x = team, n = 1),
weekly_attendance == lead(x = weekly_attendance, n = 1) ~
lead(x = team, n = 1))) %>% mutate(opp_team_name = case_when(weekly_attendance ==
lag(x = weekly_attendance, n = 1) ~ lag(x = team_name,
n = 1), weekly_attendance == lead(x = weekly_attendance,
n = 1) ~ lead(x = team_name, n = 1))) %>% filter(grepl(pattern = "Chargers|Rams",
x = team_name))))
}
Generate the Plot
plot <- ggplot(data = plot_data) + geom_point(mapping = aes(x = week,
y = weekly_attendance, col = team), shape = 1) + geom_hline(mapping = aes(yintercept = avg_weekly_atd,
col = team), linetype = 2, data = avg_att) + geom_text_repel(mapping = aes(x = 10,
y = 35000, label = paste("Average Weekly Attendance: ", avg_weekly_atd)),
family = "Bahnschrift", size = 3, data = avg_att, seed = 1008,
segment.alpha = 0.4) + geom_text_repel(mapping = aes(x = week,
y = weekly_attendance, label = paste("vs: ", opp_team_name)),
family = "Bahnschrift", size = 2.75, data = top_att_games,
seed = 1008, segment.alpha = 0.4) + facet_wrap(facets = ~team_name) +
ggthemes::theme_tufte(base_size = 12, base_family = "Bahnschrift") +
labs(x = "Game Week", y = "Attendance", col = "Location: ",
title = "Weekly NFL Attendances for Chargers and Rams Games, Year: {closest_state}",
subtitle = "Moving to LA was Deterimental for Attendances at Chargers Games \n and Beneficial for Attendances at Rams Games (Considering 2011 - 2019)",
caption = "Tidy Tuesday 2020, Week 6 | Data from Pro Football Reference | @d73mwf") +
theme(legend.position = "top") + transition_states(states = year,
transition_length = 1, state_length = 5, wrap = FALSE) +
enter_fade() + exit_fade()
animate(plot = plot, fps = 20, duration = 10, end_pause = 3,
width = 750, height = 500)
anim_save(animation = last_animation(), filename = "g2_plot.gif")
sessionInfo()
## R Under development (unstable) (2019-12-19 r77606)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets
## [6] methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-2 extrafont_0.17
## [3] gifski_0.8.6 magrittr_1.5
## [5] gganimate_1.0.5 paletteer_1.0.0
## [7] forcats_0.4.0 stringr_1.4.0
## [9] dplyr_0.8.4 purrr_0.3.3
## [11] readr_1.3.1 tidyr_1.0.2
## [13] tibble_2.1.3 ggplot2_3.2.1
## [15] tidyverse_1.3.0 BiocStyle_2.15.6
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-143 fs_1.3.1
## [3] lubridate_1.7.4 progress_1.2.2
## [5] httr_1.4.1 tools_4.0.0
## [7] backports_1.1.5 utf8_1.1.4
## [9] R6_2.4.1 DBI_1.1.0
## [11] lazyeval_0.2.2 colorspace_1.4-1
## [13] withr_2.1.2 tidyselect_1.0.0
## [15] prettyunits_1.1.1 curl_4.3
## [17] compiler_4.0.0 extrafontdb_1.0
## [19] cli_2.0.1 rvest_0.3.5
## [21] formatR_1.7 xml2_1.2.2
## [23] prismatic_0.2.0 labeling_0.3
## [25] bookdown_0.17 jcolors_0.0.4
## [27] scales_1.1.0 digest_0.6.25
## [29] rmarkdown_2.1 dichromat_2.0-0
## [31] pkgconfig_2.0.3 htmltools_0.4.0
## [33] oompaBase_3.2.9 scico_1.1.0
## [35] dbplyr_1.4.2 maps_3.3.0
## [37] palr_0.2.0 rlang_0.4.4
## [39] readxl_1.3.1 pals_1.6
## [41] rstudioapi_0.11 farver_2.0.3
## [43] generics_0.0.2 jsonlite_1.6.1
## [45] Rcpp_1.0.3 munsell_0.5.0
## [47] fansi_0.4.1 lifecycle_0.1.0
## [49] stringi_1.4.6 yaml_2.2.1
## [51] grid_4.0.0 crayon_1.3.4
## [53] lattice_0.20-38 haven_2.2.0
## [55] mapproj_1.2.7 hms_0.5.3
## [57] knitr_1.28 pillar_1.4.3
## [59] codetools_0.2-16 reprex_0.3.0
## [61] glue_1.3.1 evaluate_0.14
## [63] BiocManager_1.30.10 modelr_0.1.6
## [65] vctrs_0.2.3 tweenr_1.0.1
## [67] Rttf2pt1_1.3.8 cellranger_1.1.0
## [69] gtable_0.3.0 rematch2_2.1.0
## [71] assertthat_0.2.1 xfun_0.12
## [73] broom_0.5.4 cluster_2.1.0